?
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(Seurat)
library(ggplot2)
packageVersion("dplyr")
[1] '0.8.99.9003'
packageVersion("Seurat")
[1] '3.1.5'
CITE_GE_HTO_010.data <- Read10X(data.dir = "/home/david.mentrup/ProjectCITE/data/Run01/10X_20_OK_05_R59/cellranger/10X_20_010_GE_HTO/outs/filtered_feature_bc_matrix/")
10X data contains more than one type and is being returned as a list containing matrices of each type.
CITE_GE_HTO_011.data <- Read10X(data.dir = "/home/david.mentrup/ProjectCITE/data/Run01/10X_20_OK_05_R59/cellranger/10X_20_011_GE_HTO/outs/filtered_feature_bc_matrix/")
10X data contains more than one type and is being returned as a list containing matrices of each type.
CITE_GE_Epi_010.data <- Read10X(data.dir = "/home/david.mentrup/ProjectCITE/data/Run01/10X_20_OK_05_R59/cellranger/10X_20_010_GE_Epi/outs/filtered_feature_bc_matrix/", gene.column=1)
10X data contains more than one type and is being returned as a list containing matrices of each type.
# Names for Hashs/Epitopes corrected:
rownames(x = CITE_GE_HTO_010.data[["Antibody Capture"]]) <- c("Pat1-R1-H1","Pat1-Ven-R1-H2", "HL60-R1-H3")
rownames(x = CITE_GE_HTO_011.data[["Antibody Capture"]]) <- c("HL60-R2-H3")
rownames(x = CITE_GE_Epi_010.data[["Antibody Capture"]]) <- gsub(pattern ="_[control_]*TotalSeqA", replacement = "", x = rownames(x = CITE_GE_Epi_010.data[["Antibody Capture"]]))
CITE_GE_HTO_010 <- CreateSeuratObject(counts = CITE_GE_HTO_010.data[["Gene Expression"]], min.cells = 3, min.features = 0)
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
CITE_GE_HTO_010[["HTO"]] <- CreateAssayObject(CITE_GE_HTO_010.data[["Antibody Capture"]])
CITE_GE_HTO_010[["Epi"]] <- CreateAssayObject(CITE_GE_Epi_010.data[["Antibody Capture"]])
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
CITE_GE_HTO_010
An object of class Seurat
19524 features across 7062 samples within 3 assays
Active assay: RNA (19510 features, 0 variable features)
2 other assays present: HTO, Epi
median(CITE_GE_HTO_010$nCount_RNA)
[1] 13242
median(CITE_GE_HTO_010$nFeature_RNA)
[1] 3389.5
median(CITE_GE_HTO_010$nCount_HTO)
[1] 778
median(CITE_GE_HTO_010$nCount_Epi)
[1] 211
This sample is the control experiment without epitope labelling
CITE_GE_HTO_011 <- CreateSeuratObject(counts = CITE_GE_HTO_011.data[["Gene Expression"]], min.cells = 3, min.features = 0)
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
CITE_GE_HTO_011[["HTO"]] <- CreateAssayObject(CITE_GE_HTO_011.data[["Antibody Capture"]])
CITE_GE_HTO_011
An object of class Seurat
18003 features across 3769 samples within 2 assays
Active assay: RNA (18002 features, 0 variable features)
1 other assay present: HTO
median(CITE_GE_HTO_011$nCount_RNA)
[1] 14149
median(CITE_GE_HTO_011$nFeature_RNA)
[1] 3628
median(CITE_GE_HTO_011$nCount_HTO)
[1] 40
Standart quality control and filtering
#Mitochondrial genes selected
CITE_GE_HTO_010[["percent.mt"]] <- PercentageFeatureSet(CITE_GE_HTO_010, pattern = "^MT-")
CITE_GE_HTO_011[["percent.mt"]] <- PercentageFeatureSet(CITE_GE_HTO_011, pattern = "^MT-")
# Visualize as a violin plot
# Visualize QC metrics as a violin plot
VlnPlot(CITE_GE_HTO_010, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
VlnPlot(CITE_GE_HTO_011, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
The aggregated correlation of amount of RNA and mitochondrial RNA as well as number of Feature is plotted as a further QC
plot1 <- FeatureScatter(CITE_GE_HTO_010, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(CITE_GE_HTO_010, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
Warning: CombinePlots is being deprecated. Plots should now be combined using
the patchwork system.
plot1 <- FeatureScatter(CITE_GE_HTO_011, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(CITE_GE_HTO_011, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
Warning: CombinePlots is being deprecated. Plots should now be combined using
the patchwork system.
We filter for mitochodrial genes (>20%) as well as all cells below 600 features out as well
CITE_GE_HTO_010 <- subset(CITE_GE_HTO_010, subset = nFeature_RNA > 600 & percent.mt <20)
CITE_GE_HTO_011 <- subset(CITE_GE_HTO_011, subset = nFeature_RNA > 600 & percent.mt <20)
CITE_GE_HTO_010<- NormalizeData(CITE_GE_HTO_010)
CITE_GE_HTO_010<- FindVariableFeatures(CITE_GE_HTO_010)
CITE_GE_HTO_010<- ScaleData(CITE_GE_HTO_010)
Centering and scaling data matrix
CITE_GE_HTO_010<- RunPCA(CITE_GE_HTO_010)
PC_ 1
Positive: IL8, SAT1, THBS1, BTG1, IER3, SLC7A11, KYNU, SOCS3, NAMPT, C15orf48
PLAUR, CXCL3, SOD2, PHLDA1, NFKBIZ, NEU4, IL1B, CREM, CXCL2, PDE4DIP
MXD1, MS4A7, DUSP1, BCL2A1, INHBA, TFPI, POU2F2, LGALS3, ABCA1, PTPRE
Negative: PTMA, TUBA1B, NPM1, TYMS, SNRPD1, DUT, STMN1, HMGB1, UQCRH, RANBP1
KIAA0101, PA2G4, MIF, LDHB, NUCB2, FBL, SNRPF, GCSH, SLC29A1, NME1
PPA1, PPP1R27, PPIA, HSPD1, TIMM13, RPL27A, HSPE1, PAICS, TUBB, RNASEH2B
PC_ 2
Positive: RPS2, RPL14, RPL21, RPL13A, RPS14, RPS5, RPSA, EGFL7, NCOA7, SOX4
SPINK2, RPL4, IL1RL1, GUCY1A3, GNB2L1, SLC40A1, GBP4, SOCS2, TFPI, RPS11
GCSAML, AQP3, FSCN1, IGFBP2, HLA-DPB1, CPA3, HLA-DPA1, NPM1, CYTL1, PEBP1
Negative: EPB41L3, FCER1G, CYBB, CD14, CD93, ANXA5, S100A9, PLA2G7, MMP14, RAB31
PILRA, NCF2, HNMT, MPEG1, S100A8, LYZ, MS4A7, VCAN, NCF1, S100A10
SH3BP5, CXCL16, SERPINB2, SERPINA1, RBM47, CTB-61M7.2, PID1, CD300E, S100A12, CTSL
PC_ 3
Positive: CAMK1, CD4, RNASET2, APP, PLAC8, RP11-1112C15.1, CST7, TGM5, LSP1, NR2F2
AL589743.1, PPP1R27, FAM107B, HOXC10, IGF2BP3, TRIP6, IGF2BP1, C1orf228, MNDA, NRN1
CD48, EVL, LRP3, MPO, LIN7A, MEIS2, RPL13A, S100B, CEBPE, OSBPL1A
Negative: CLSPN, HELLS, MCM10, CCNE2, CHEK1, DTL, PCNA, CPA3, XRCC2, UNG
IGFBP2, FEN1, BRCA1, MCM6, FANCI, HSPA1A, MCM2, ATAD5, CENPU, KIAA0101
MCM5, CDC6, TIMP1, HSPA8, RAD51, RRM1, TK1, NCOA7, CDT1, TYMS
PC_ 4
Positive: CST3, S100A4, ITGB2, ALOX5AP, CAPG, CORO1A, ITM2B, AMICA1, ICAM3, HLA-DMA
HLA-DPA1, SCPEP1, TNNI2, HLA-DPB1, TIMP1, ARHGDIB, NCF1, CD52, S100B, SAMHD1
AIF1, COTL1, C1orf162, ADORA3, TOMM7, PAK1, PLXDC2, SIGLEC7, CD101, HLA-DQB1
Negative: INHBA, NEU4, PHLDA1, TSC22D1, IL11, CXCL3, CXCL2, NAMPT, BTG1, ATP2B1
DDIT3, SAT1, DDIT4, JUN, CTNNAL1, CXCL1, CHST7, LINC00936, NR4A3, CREM
TRAF1, IL24, ATF3, DUSP1, IL8, IRAK2, TNFAIP6, SOCS3, MAFF, MAP4K4
PC_ 5
Positive: YBX3, IL8, CYCS, ATP5B, HSP90AB1, MIR155HG, COX5A, PSMB7, INHBA, SRM
CXCL3, CXCL2, UQCRFS1, PSMB1, GTSF1, UBE2M, YBX1, NOP16, NME1, RPL7L1
EBNA1BP2, TIMM13, SAT1, NAMPT, NEU4, HSPD1, IL1B, TFPI, CXCL1, PSMB2
Negative: GIMAP7, CD3D, CD2, SPOCK2, CXCR4, GIMAP4, CD3G, CD3E, CD7, SELL
PRDX2, CRIP1, SESN3, GZMM, IL7R, ARL4C, EVL, CCR7, CD6, CD48
CLIC3, TRAT1, KLF2, LBH, ABLIM1, LTB, CD27, TSHZ2, CDK1, GNLY
ElbowPlot(CITE_GE_HTO_010, ndims = 50)
CITE_GE_HTO_010 <- RunUMAP(CITE_GE_HTO_010, dims = 1:25)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
16:50:14 UMAP embedding parameters a = 0.9922 b = 1.112
16:50:14 Read 6505 rows and found 25 numeric columns
16:50:14 Using Annoy for neighbor search, n_neighbors = 30
16:50:14 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:50:16 Writing NN index file to temp file /tmp/Rtmp9rQSNO/file1acbf121d7d45
16:50:16 Searching Annoy index using 1 thread, search_k = 3000
16:50:17 Annoy recall = 100%
16:50:18 Commencing smooth kNN distance calibration using 1 thread
16:50:18 Initializing from normalized Laplacian + noise
16:50:19 Commencing optimization for 500 epochs, with 267946 positive edges
16:50:43 Optimization finished
CITE_GE_HTO_010 <- FindNeighbors(CITE_GE_HTO_010, dims = 1:25)
Computing nearest neighbor graph
Computing SNN
CITE_GE_HTO_010 <- FindClusters(CITE_GE_HTO_010, resolution = 0.6, algorithm = 3)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 6505
Number of edges: 227543
Running smart local moving algorithm...
Maximum modularity in 10 random starts: 0.8810
Number of communities: 12
Elapsed time: 3 seconds
CITE_GE_HTO_010@meta.data$RNA_cluster <-CITE_GE_HTO_010@active.ident
DimPlot(CITE_GE_HTO_010, label = TRUE, label.size = 10) + NoLegend()
CITE_GE_HTO_011<- NormalizeData(CITE_GE_HTO_011)
CITE_GE_HTO_011<- FindVariableFeatures(CITE_GE_HTO_011)
CITE_GE_HTO_011<- ScaleData(CITE_GE_HTO_011)
Centering and scaling data matrix
CITE_GE_HTO_011<- RunPCA(CITE_GE_HTO_011)
PC_ 1
Positive: IL8, THBS1, SAT1, BTG1, IER3, SLC7A11, NAMPT, KYNU, PLAUR, C15orf48
DUSP1, NFKBIZ, FOSL2, CXCL3, MXD1, SOD2, JUN, MS4A7, PHLDA1, SLC43A2
SOCS3, IL1B, CXCL2, MMP14, SH3BP5, MAFB, CD14, SERPINB2, CTB-61M7.2, ABCA1
Negative: PTMA, NPM1, SNRPD1, DUT, TYMS, TUBA1B, RANBP1, LDHB, STMN1, NME1
KIAA0101, HSPE1, PPA1, UQCRH, PA2G4, FBL, HSPD1, NHP2, GCSH, PAICS
RPS2, RPSA, HMGB1, SRM, HSP90AB1, ATP5G1, SNRPF, RPS14, RPS5, RAN
PC_ 2
Positive: RPL18A, RPL5, RPL13A, RPL36, RPS2, RPL21, RPL23A, RPS15, RPS8, RPL14
RPS5, RPS14, RPSA, RPLP2, SOX4, RPL4, TFPI, RPS28, EGFL7, GNB2L1
GBP4, NCOA7, SLC45A3, GUCY1A3, RPS25, IL1RL1, RPS11, SPINK2, SLC40A1, FSCN1
Negative: EPB41L3, FCER1G, CD93, S100A9, ANXA5, CYBB, CD14, PLA2G7, RAB31, LYZ
NCF2, PILRA, MMP14, S100A10, CTB-61M7.2, MPEG1, S100A8, NCF1, HNMT, MT2A
CD300E, VCAN, SERPINA1, MS4A7, SERPINB2, SH3BP5, CXCL16, PID1, RBM47, CYP27A1
PC_ 3
Positive: CLSPN, GINS2, HELLS, E2F1, MCM10, MCM4, DTL, CCNE2, CENPK, CHEK1
PCNA, CPA3, MCM6, MCM2, XRCC2, CSF2RB, IGFBP2, UNG, HSPA8, SLC45A3
ATAD5, BRCA1, TIMP1, CHAF1A, FEN1, HSPA1A, EXO1, MCM5, CDT1, FANCI
Negative: PPP1R27, NR2F2, APP, CAMK1, CST7, MPO, IGF2BP1, RP11-1112C15.1, CD4, LRP3
AL589743.1, PLAC8, RNASET2, TRIP6, CEBPE, HOXC10, LIN7A, CTSG, PRTN3, C1orf228
TGM5, IGF2BP3, MNDA, AZU1, MEIS2, IGFBP7, CORO2A, NRN1, KCNN2, RNF165
PC_ 4
Positive: IL11, NEU4, TSC22D1, PHLDA1, INHBA, DDIT3, CTNNAL1, ATP2B1, CXCL2, CXCL3
NAMPT, BTG1, ATF3, SAT1, FOSL2, NRIP3, CREM, LINC00936, IL24, DDIT4
USP53, JUN, TUBB4B, RBKS, CDK1, CHST7, CXCL1, NR4A3, ABCA1, TOP2A
Negative: GSN, S100A4, CST3, CORO1A, LST1, AMICA1, TSPO, HLA-DMA, ARHGDIB, ITGB2
HLA-DPA1, ITM2B, HLA-DPB1, CAPG, ALOX5AP, HLA-DQB1, CFP, ICAM3, FGL2, HLA-DMB
AIF1, CD52, SAMHD1, CD9, PLXDC2, SORL1, CD180, NCF1, LAT2, HLA-DQA1
PC_ 5
Positive: YBX3, CXCL1, IL8, CYCS, CXCL2, CXCL3, IL6, HSP90AB1, RPF2, MIR155HG
IL1B, SRM, ITGB8, TXN, MARCKS, CCL20, RPL7L1, IL1A, NME1, PSMB7
EBNA1BP2, NAMPT, CXCL5, TIMM13, CCL3, BCAT1, HSPE1, IL24, NOP16, TNFAIP6
Negative: GIMAP7, SYNE2, GIMAP4, CD3D, CD2, SPOCK2, CXCR4, CLIC3, GZMM, CRIP1
EVL, LCK, CD3E, CD3G, CDK1, GNLY, SAMD3, LSP1, PRDX2, LBH
HMGB2, NCAPG, NDC80, RRM2, ITM2B, UBE2C, SPC25, SLA, TOP2A, KIFC1
ElbowPlot(CITE_GE_HTO_011, ndims = 50)
CITE_GE_HTO_011 <- RunUMAP(CITE_GE_HTO_011, dims = 1:25)
16:50:55 UMAP embedding parameters a = 0.9922 b = 1.112
16:50:55 Read 3515 rows and found 25 numeric columns
16:50:55 Using Annoy for neighbor search, n_neighbors = 30
16:50:55 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:50:56 Writing NN index file to temp file /tmp/Rtmp9rQSNO/file1acbf22a4ebd7
16:50:56 Searching Annoy index using 1 thread, search_k = 3000
16:50:57 Annoy recall = 100%
16:50:57 Commencing smooth kNN distance calibration using 1 thread
16:50:58 Initializing from normalized Laplacian + noise
16:50:58 Commencing optimization for 500 epochs, with 141290 positive edges
16:51:11 Optimization finished
CITE_GE_HTO_011 <- FindNeighbors(CITE_GE_HTO_011, dims = 1:25)
Computing nearest neighbor graph
Computing SNN
CITE_GE_HTO_011 <- FindClusters(CITE_GE_HTO_011, resolution = 0.6, algorithm = 3)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 3515
Number of edges: 117662
Running smart local moving algorithm...
Maximum modularity in 10 random starts: 0.8660
Number of communities: 12
Elapsed time: 1 seconds
CITE_GE_HTO_011@meta.data$RNA_cluster <-CITE_GE_HTO_011@active.ident
DimPlot(CITE_GE_HTO_011, label = TRUE, label.size = 10) + NoLegend()
CITE_GE_HTO_010 <- NormalizeData(CITE_GE_HTO_010, assay = "HTO", normalization.method = "CLR")
Normalizing across features
CITE_GE_HTO_010 <- ScaleData(CITE_GE_HTO_010, assay = "HTO")
Centering and scaling data matrix
The demultiplexing of the Spike-in (HL60) is feasible:
FeaturePlot(CITE_GE_HTO_010, features = c("Pat1-R1-H1","Pat1-Ven-R1-H2", "HL60-R1-H3"))
Warning: Could not find Pat1-R1-H1 in the default search locations, found in HTO
assay instead
Warning: Could not find Pat1-Ven-R1-H2 in the default search locations, found in
HTO assay instead
Warning: Could not find HL60-R1-H3 in the default search locations, found in HTO
assay instead
VlnPlot(CITE_GE_HTO_010, assay= "HTO",features = c("Pat1-R1-H1","Pat1-Ven-R1-H2", "HL60-R1-H3"), ncol = 2, pt.size = 0.1) + NoLegend()
VlnPlot(CITE_GE_HTO_010, features = c("Pat1-R1-H1","Pat1-Ven-R1-H2", "HL60-R1-H3"), ncol = 2, pt.size = 0.1, group.by = "orig.ident") + NoLegend()
Warning: Could not find Pat1-R1-H1 in the default search locations, found in HTO
assay instead
Warning: Could not find Pat1-Ven-R1-H2 in the default search locations, found in
HTO assay instead
Warning: Could not find HL60-R1-H3 in the default search locations, found in HTO
assay instead
FeatureScatter(CITE_GE_HTO_010, feature1 = "hto_Pat1-R1-H1", feature2 = "hto_Pat1-Ven-R1-H2",group.by = "orig.ident")
FeatureScatter(CITE_GE_HTO_010, feature1 = "hto_Pat1-R1-H1", feature2 = "hto_HL60-R1-H3",group.by = "orig.ident") + geom_hline(yintercept=1.5) + NoLegend()
RidgePlot(CITE_GE_HTO_010, features = "hto_HL60-R1-H3", y.max = 0.01, group.by = "orig.ident")+ theme(axis.line.y = element_line())
Picking joint bandwidth of 0.0434
VlnPlot(CITE_GE_HTO_010, features = c("hto_HL60-R1-H3"), ncol = 1, pt.size = 0.1, group.by = "orig.ident", y.max =6) + NoLegend() + geom_hline(yintercept=1.5)
Warning: Removed 13 rows containing non-finite values (stat_ydensity).
Warning: Removed 13 rows containing missing values (geom_point).
RidgePlot(CITE_GE_HTO_010, features = "Pat1-R1-H1", y.max = 0.01, group.by = "orig.ident")+ theme(axis.line.y = element_line())
Warning: Could not find Pat1-R1-H1 in the default search locations, found in HTO
assay instead
Picking joint bandwidth of 0.0623
VlnPlot(CITE_GE_HTO_010, features = c("Pat1-R1-H1"), ncol = 1, pt.size = 0.1, group.by = "orig.ident", y.max =2) + NoLegend() + geom_hline(yintercept=0.75)
Warning: Could not find Pat1-R1-H1 in the default search locations, found in HTO
assay instead
Warning: Removed 372 rows containing non-finite values (stat_ydensity).
Warning: Removed 372 rows containing missing values (geom_point).
RidgePlot(CITE_GE_HTO_010, features = "hto_Pat1-Ven-R1-H2", y.max = 0.01, group.by = "orig.ident")+ theme(axis.line.y = element_line())
Picking joint bandwidth of 0.0773
VlnPlot(CITE_GE_HTO_010, features = c("hto_Pat1-Ven-R1-H2"), ncol = 1, pt.size = 0.1, group.by = "orig.ident", y.max =2) + NoLegend() + geom_hline(yintercept=0.55)
Warning: Removed 389 rows containing non-finite values (stat_ydensity).
Warning: Removed 389 rows containing missing values (geom_point).
This is a preliminary demultiplexing trial, final demultiplexing was conducted after a restrictive removal of the cell line
DefaultAssay(object = CITE_GE_HTO_010) <- "HTO"
CITE_GE_HTO_010@meta.data$Sample <- 'Unknown'
CITE_GE_HTO_010@meta.data[WhichCells(CITE_GE_HTO_010, expression = `HL60-R1-H3` > 1.5) ,"Sample"] <- "HL60_R1_H3-Duplet"
CITE_GE_HTO_010@meta.data[WhichCells(CITE_GE_HTO_010, expression = `HL60-R1-H3` > 1.5 & `Pat1-Ven-R1-H2` <0.55 & `Pat1-R1-H1` <0.75),"Sample"] <- "HL60_R1_H3"
CITE_GE_HTO_010@meta.data[WhichCells(CITE_GE_HTO_010, expression = `HL60-R1-H3` < 1.5 & `Pat1-Ven-R1-H2` >0.55 & `Pat1-R1-H1` <0.75),"Sample"] <- "Pat1_Ven_R1_H2"
CITE_GE_HTO_010@meta.data[WhichCells(CITE_GE_HTO_010, expression = `HL60-R1-H3` < 1.5 & `Pat1-Ven-R1-H2` <0.55 & `Pat1-R1-H1` >0.75),"Sample"] <- "Pat1_R1_H1"
DefaultAssay(object = CITE_GE_HTO_010) <- "RNA"
FeatureScatter(CITE_GE_HTO_010, feature1 = "hto_Pat1-R1-H1", feature2 = "hto_Pat1-Ven-R1-H2",group.by = "Sample")
table(CITE_GE_HTO_010@meta.data$Sample)
HL60_R1_H3 HL60_R1_H3-Duplet Pat1_R1_H1 Pat1_Ven_R1_H2
887 244 1435 3077
Unknown
862
A proper demultiplexing was conducted in the RMarkdown “Processing and annotation of S01_CITE” after removal of the cell line
CITE_GE_HTO_011 <- NormalizeData(CITE_GE_HTO_011, assay = "HTO", normalization.method = "CLR")
Normalizing across features
CITE_GE_HTO_011 <- ScaleData(CITE_GE_HTO_011, assay = "HTO")
Centering and scaling data matrix
FeaturePlot(CITE_GE_HTO_011, features = c("HL60-R2-H3"))
Warning: Could not find HL60-R2-H3 in the default search locations, found in HTO
assay instead
VlnPlot(CITE_GE_HTO_011, assay= "HTO",features = c("HL60-R2-H3"), ncol = 1, pt.size = 0.1) + NoLegend()
VlnPlot(CITE_GE_HTO_011, assay= "HTO",features = c("HL60-R2-H3"), ncol = 1, pt.size = 0.1, group.by = "orig.ident") + NoLegend()
RidgePlot(CITE_GE_HTO_011, features = "HL60-R2-H3", y.max = 0.01, group.by = "orig.ident")+ theme(axis.line.y = element_line())
Warning: Could not find HL60-R2-H3 in the default search locations, found in HTO
assay instead
Picking joint bandwidth of 0.0379
VlnPlot(CITE_GE_HTO_011, features = c("HL60-R2-H3"), ncol = 1, pt.size = 0.1, group.by = "orig.ident", y.max =6) + NoLegend() + geom_hline(yintercept=2)
Warning: Could not find HL60-R2-H3 in the default search locations, found in HTO
assay instead
Warning: Removed 32 rows containing non-finite values (stat_ydensity).
Warning: Removed 32 rows containing missing values (geom_point).
DefaultAssay(object = CITE_GE_HTO_011) <- "HTO"
CITE_GE_HTO_011@meta.data$Sample <- 'Unknown'
CITE_GE_HTO_011@meta.data[WhichCells(CITE_GE_HTO_011, expression = `HL60-R2-H3` > 2) ,"Sample"] <- "HL60_R2_H3"
DefaultAssay(object = CITE_GE_HTO_011) <- "RNA"
CITE_GE_HTO_010 <- NormalizeData(CITE_GE_HTO_010, assay = "Epi", normalization.method = "CLR")
Normalizing across features
CITE_GE_HTO_010 <- ScaleData(CITE_GE_HTO_010, assay = "Epi")
Centering and scaling data matrix
VlnPlot(CITE_GE_HTO_010, assay= "Epi", features = c("CD19","CD3","CD16","CD4","CD11c","CD56-NCAM","CD14","CD8","CD45","CD34","CD15"), ncol = 3, pt.size = 0.1,group.by ="orig.ident" ) + NoLegend()
FeaturePlot(CITE_GE_HTO_010, features = c("epi_CD19","CD3","CD16","epi_CD4","CD11c","CD56-NCAM","epi_CD14","CD8","CD45","CD34","CD15" ))
Warning: Could not find CD3 in the default search locations, found in Epi assay
instead
Warning: Could not find CD16 in the default search locations, found in Epi assay
instead
Warning: Could not find CD11c in the default search locations, found in Epi
assay instead
Warning: Could not find CD56-NCAM in the default search locations, found in Epi
assay instead
Warning: Could not find CD8 in the default search locations, found in Epi assay
instead
Warning: Could not find CD45 in the default search locations, found in Epi assay
instead
Warning: Could not find CD34 in the default search locations, found in Epi assay
instead
Warning: Could not find CD15 in the default search locations, found in Epi assay
instead
VlnPlot(CITE_GE_HTO_010, assay= "Epi", features = c("CD19","CD3","CD16","CD4","CD11c","CD56-NCAM","CD14","CD8","CD45","CD34","CD15" ), ncol = 3, pt.size = 0.1) + NoLegend()
The problem is that the overall counts for most of the epitopes are too low for a proper normalization and analysis
FeatureScatter(CITE_GE_HTO_010, feature1 = "rna_FUT4" , feature2 = "epi_CD15", slot = "counts" )
FeatureScatter(CITE_GE_HTO_010, feature1 = "epi_CD4" , feature2 = "epi_CD8" )
FeatureScatter(CITE_GE_HTO_010, feature1 = "rna_CD4" , feature2 = "rna_CD8B" )
RidgePlot(CITE_GE_HTO_010, features = c("epi_CD3", "epi_CD11c", "epi_CD8", "epi_CD16"), ncol = 2)
Picking joint bandwidth of 0.127
Picking joint bandwidth of 0.0948
Picking joint bandwidth of 0.168
Picking joint bandwidth of 0.14
RidgePlot(CITE_GE_HTO_010,assay= "Epi", features = c("CD19","CD3","CD16","CD4","CD11c","CD56-NCAM","CD14","CD8","CD45","CD34","CD15" ), idents = c("0", "1", "2", "3", "5", "6"), ncol = 3)
Picking joint bandwidth of 0.0974
Picking joint bandwidth of 0.0865
Picking joint bandwidth of 0.0835
Picking joint bandwidth of 0.0766
Picking joint bandwidth of 0.0841
Picking joint bandwidth of 0.0846
Picking joint bandwidth of 0.0877
Picking joint bandwidth of 0.095
Picking joint bandwidth of 0.0829
Picking joint bandwidth of 0.0577
Picking joint bandwidth of 0.0525
DefaultAssay(CITE_GE_HTO_010) <- "Epi"
CITE_GE_HTO_010 <- RunPCA(CITE_GE_HTO_010, features = rownames(CITE_GE_HTO_010), reduction.name = "pca_epi", reduction.key = "pca_Epi_",
verbose = FALSE)
Warning in irlba(A = t(x = object), nv = npcs, ...): You're computing too large
a percentage of total singular values, use a standard svd instead.
Warning in irlba(A = t(x = object), nv = npcs, ...): did not converge--results
might be invalid!; try increasing work or maxit
Warning: Keys should be one or more alphanumeric characters followed by an
underscore, setting key from pca_Epi_ to pcaEpi_
Warning: All keys should be one or more alphanumeric characters followed by an
underscore '_', setting key to pcaEpi_
DimPlot(CITE_GE_HTO_010, reduction = "pca_epi")
epi.data <- GetAssayData(CITE_GE_HTO_010, slot = "data")
epi.dist <- dist(t(epi.data))
CITE_GE_HTO_010[["rnaClusterID"]] <- Idents(CITE_GE_HTO_010)
CITE_GE_HTO_010[["tsne_epi"]] <- RunTSNE(epi.dist, assay = "Epi", reduction.key = "epiTSNE_")
CITE_GE_HTO_010[["umap_epi"]] <- RunUMAP(epi.dist, assay = "Epi", reduction.key = "epiUMAP_")
16:52:50 UMAP embedding parameters a = 0.9922 b = 1.112
16:52:50 Read 6505 rows
16:52:50 Using Annoy for neighbor search, n_neighbors = 30
16:52:58 Commencing smooth kNN distance calibration using 1 thread
16:52:59 Initializing from normalized Laplacian + noise
16:52:59 Commencing optimization for 500 epochs, with 273140 positive edges
16:53:22 Optimization finished
CITE_GE_HTO_010[["epi_snn"]] <- FindNeighbors(epi.dist)$snn
Building SNN based on a provided distance matrix
Computing SNN
Warning: Adding a Graph without an assay associated with it
CITE_GE_HTO_010 <- FindClusters(CITE_GE_HTO_010, resolution = 0.2, graph.name = "epi_snn")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 6505
Number of edges: 197891
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9086
Number of communities: 8
Elapsed time: 0 seconds
Warning: Adding a command log without an assay associated with it
CITE_GE_HTO_010@meta.data$Epi_cluster <-CITE_GE_HTO_010@active.ident
clustering.table <- table(Idents(CITE_GE_HTO_010), CITE_GE_HTO_010$rnaClusterID)
clustering.table
0 1 2 3 4 5 6 7 8 9 10 11
0 508 542 365 344 313 54 164 114 4 80 4 2
1 554 460 318 269 239 34 130 115 6 55 7 5
2 182 156 106 66 49 90 33 32 53 9 12 12
3 50 30 14 7 7 381 6 16 2 3 1 12
4 7 6 4 3 3 2 3 1 142 0 1 0
5 21 29 22 10 12 16 7 4 8 1 0 0
6 4 2 0 2 1 1 0 0 41 0 55 0
7 21 7 11 3 3 25 5 7 0 0 0 0
DimPlot(CITE_GE_HTO_010, reduction = "umap_epi", pt.size = 0.5, label = TRUE, label.size = 10) + NoLegend()
This clustering outcome is not really useful as we barely have distinct clusters and the largest two clusters are dominant due to the absence of epitope markers:
RidgePlot(CITE_GE_HTO_010,assay= "Epi", features = c("CD19","CD3","CD16","CD4","CD11c","CD56-NCAM","CD14","CD8","CD45","CD34","CD15" ,"HL60-R1-H3" ), ncol = 3)
Warning: Could not find HL60-R1-H3 in the default search locations, found in HTO
assay instead
Picking joint bandwidth of 0.104
Picking joint bandwidth of 0.135
Picking joint bandwidth of 0.137
Picking joint bandwidth of 0.102
Picking joint bandwidth of 0.116
Picking joint bandwidth of 0.12
Picking joint bandwidth of 0.129
Picking joint bandwidth of 0.158
Picking joint bandwidth of 0.118
Picking joint bandwidth of 0.134
Picking joint bandwidth of 0.146
Picking joint bandwidth of 0.0677
The overall epitope counts also do not similar the epitope counts from the cytof experiment:
Y89DiCD45 27.98% Sm147DiCD11c 36.80% Gd158DiCD34 9.75% Yb173DiCD56 9.41% Ho165DiCD3 6.44% Lu175DiCD15 3.60% Nd142DiCD19 3.11% Nd144DiCD16 1.25% Gd160DiCD14 1.64%
The median counts of each epitope show the same picture:
library(matrixStats)
Attaching package: 'matrixStats'
The following object is masked from 'package:dplyr':
count
rownames(CITE_GE_HTO_010@assays$Epi@counts)
[1] "CD19" "CD3" "CD16" "CD4" "CD11c" "CD56-NCAM"
[7] "CD14" "CD8" "CD45" "CD34" "CD15"
rowMedians(as.matrix(CITE_GE_HTO_010@assays$Epi@counts))
[1] 2 6 5 4 22 4 2 1 3 18 135
As HL60 was not actively labelled in the experimental procedure these Epitope counts are not expected and show the large antibody diffusion that happened between mixing and library preparation
HL60_CITE <- subset(CITE_GE_HTO_010, subset = Sample == "HL60_R1_H3")
x <- as.array(HL60_CITE@assays$Epi@counts)
barplot(rowSums(x))
saveRDS(CITE_GE_HTO_010, file = "./Pat1_HL60_CITE.rds")
saveRDS(CITE_GE_HTO_011, file = "./Pat1_HL60_control.rds")